Litviňuková, M., Talavera-López, C., Maatz, H., Reichart, D., Worth, C.L., Lindberg, E.L., Kanda, M., Polanski, K., Heinig, M., Lee, M., et al. (2020). Cells of the adult human heart. Nature 588, 466–472.

  • BioProject Accession: PRJEB39602
  • GEO Accession: ERP123138



Load required packages.

library(tidyverse)
library(magrittr)
library(Matrix)
library(patchwork)
library(extrafont)
library(reticulate)
Sys.time()
## [1] "2021-02-03 20:52:16 CST"


Data preparation


Functions

source(
    file = file.path(
        SCRIPT_DIR,
        "utilities.R"
    )
)
PROJECT_DIR <- "/Users/jialei/Dropbox/Data/Projects/UTSW/HCM/"

Anndata

Load re-mapped UMI count matrix stored in the h5ad file.

ad <- reticulate::import(module = "anndata", convert = TRUE)
print(ad$`__version__`)
## [1] "0.7.5"
adata <- ad$read_h5ad(
    filename = file.path(
        PROJECT_DIR,
        "raw/public/PRJEB39602/matrices/predefined",
        "adata.h5ad"
    ),
    backed = "r"
)

Metadata

Raw bam files were downloaded at https://www.ebi.ac.uk/ena/browser/view/PRJEB39602.

file_info <- readr::read_delim(
    file = file.path(
        PROJECT_DIR,
        "raw/public/PRJEB39602",
        "filereport_read_run_PRJEB39602_tsv.txt"
    ),
    delim = "\t"
)
## 
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## cols(
##   study_accession = col_character(),
##   sample_accession = col_character(),
##   experiment_accession = col_character(),
##   run_accession = col_character(),
##   tax_id = col_double(),
##   scientific_name = col_character(),
##   fastq_md5 = col_logical(),
##   fastq_ftp = col_logical(),
##   submitted_md5 = col_character(),
##   submitted_ftp = col_character(),
##   sra_md5 = col_logical(),
##   sra_ftp = col_logical()
## )
cell_metadata_PRJEB39602 <- read_csv(
    file = file.path(
        PROJECT_DIR,
        "raw/public/PRJEB39602",
        "SraRunTable.txt"
    )
)
## 
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## cols(
##   .default = col_character(),
##   `ENA-FIRST-PUBLIC (run)` = col_date(format = ""),
##   `ENA-FIRST-PUBLIC` = col_date(format = ""),
##   `ENA-LAST-UPDATE (run)` = col_date(format = ""),
##   `ENA-LAST-UPDATE` = col_date(format = ""),
##   `nominal_length (exp)` = col_double(),
##   `nominal_sdev (exp)` = col_double(),
##   ReleaseDate = col_datetime(format = "")
## )
## ℹ Use `spec()` for the full column specifications.
file_info %>%
    select(sample_accession, experiment_accession, submitted_ftp) %>%
    mutate(
        base_name = str_remove(submitted_ftp, ";.+$"),
        base_name = str_remove(base_name, "^.+/"),
        base_name = str_remove(base_name, "_.+$") %>% str_remove(".bam")
    ) %>%
    select(-submitted_ftp) %>%
    unique() %>%
    left_join(
        cell_metadata_PRJEB39602 %>%
            select(
                sample_accession = BioSample, library_name = `Library Name`
            ) %>%
            unique(),
        by = "sample_accession"
    )

Formatting

Load metadata released at the HCA portal.

# download from hca
cell_metadata_hca <- readr::read_delim(
    file = file.path(
        PROJECT_DIR,
        "docs/Cells_of_the_adult_human_heart",
        "HeartSingleCellsAndNucleiSeq_2020-12-21_03.45.tsv"
    ),
    delim = "\t"
)
## 
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## cols(
##   .default = col_character(),
##   bundle_version = col_datetime(format = ""),
##   cell_suspension.biomaterial_core.ncbi_taxon_id = col_double(),
##   cell_suspension.estimated_cell_count = col_double(),
##   cell_suspension.provenance.schema_major_version = col_double(),
##   cell_suspension.provenance.schema_minor_version = col_double(),
##   collection_protocol.provenance.schema_major_version = col_double(),
##   collection_protocol.provenance.schema_minor_version = col_double(),
##   dissociation_protocol.provenance.schema_major_version = col_double(),
##   dissociation_protocol.provenance.schema_minor_version = col_double(),
##   donor_organism.biomaterial_core.ncbi_taxon_id = col_double(),
##   donor_organism.provenance.schema_major_version = col_double(),
##   donor_organism.provenance.schema_minor_version = col_double(),
##   enrichment_protocol.maximum_size = col_double(),
##   enrichment_protocol.provenance.schema_major_version = col_double(),
##   enrichment_protocol.provenance.schema_minor_version = col_double(),
##   file_format = col_logical(),
##   file_size = col_double(),
##   file_version = col_datetime(format = ""),
##   library_preparation_protocol.cell_barcode.barcode_length = col_double(),
##   library_preparation_protocol.cell_barcode.barcode_offset = col_double()
##   # ... with 17 more columns
## )
## ℹ Use `spec()` for the full column specifications.
cell_metadata_hca %>%
    select(
        sample_accession = `specimen_from_organism.biomaterial_core.biosamples_accession`,
        base_name = file_name,
        sample_description = `specimen_from_organism.organ_parts.text`,
        sample_biomaterial_id = `specimen_from_organism.biomaterial_core.biomaterial_id`,
        cell = `cell_suspension.biomaterial_core.biomaterial_description`
    ) %>%
    mutate(
        base_name = str_remove(base_name, "_S[0-9]{1}_.+$"),
        cell = str_remove(cell, " .+$")
    ) %>%
    unique()


Load embedding released along with the publication (extracted from h5ad file hosted at the HCA portal).

embedding_predefined <- read_csv(
    file = file.path(
        PROJECT_DIR,
        "raw/public/PRJEB39602/ad98d3cd-26fb-4ee3-99c9-8a2ab085e737.hca_heart_global_ctl200723_freeze",
        "embedding_global_raw.gz"
    )
)
## Warning: Missing column names filled in: 'X1' [1]
## 
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## cols(
##   .default = col_character(),
##   n_counts = col_double(),
##   n_genes = col_double(),
##   percent_mito = col_double(),
##   percent_ribo = col_double(),
##   scrublet_score = col_double(),
##   x_umap = col_double(),
##   y_umap = col_double(),
##   x_pca = col_double(),
##   y_pca = col_double()
## )
## ℹ Use `spec()` for the full column specifications.

ID, matching downloaded files

Match downloaded bam files to the ids used in the h5ad file provided on the HCA portal.

cell_ids <- tibble::tribble(
    ~base_name, ~predefined_id,
    "5f1b169afe9c934c8b836260", "H0037_Apex",
    "5f1b169afe9c934c8b83625f", "H0025_RV",
    "5f1b169afe9c934c8b83625e", "H0020_septum",
    "5f1b169afe9c934c8b83625d", "H0035_LV",
    "5f1b169afe9c934c8b83625c", "H0026_RA",
    "5f1b169afe9c934c8b83625b", "H0015_LV",
    "5f1b169afe9c934c8b83625a", "H0035_LA",
    "5f1b169afe9c934c8b836259", "H0037_RA_corr",
    "5f1b169afe9c934c8b836258", "H0026_RV",
    "5f1b169afe9c934c8b836257", "H0037_RV",
    "5f1b169afe9c934c8b836256", "H0037_septum",
    "5f1b169afe9c934c8b836255", "H0037_LA_corr",
    "5f1b169afe9c934c8b836254", "H0015_RV",
    "5f1b1699fe9c934c8b836253", "H0020_RA",
    "5f1b1699fe9c934c8b836252", "H0035_septum",
    "5f1b168ffe9c934c8b8361f5", "HCAHeart7836682",
    "5f1b1691fe9c934c8b8361fc_1", "HCAHeart7656539",
    "5f1b1691fe9c934c8b8361fc_2", "HCAHeart7702880",
    "5f1b1696fe9c934c8b83622c", "HCAHeart8287125",
    "5f1b1694fe9c934c8b836222", "HCAHeart8287124",
    "5f1b1698fe9c934c8b836241", "H0015_RA",
    "5f1b1699fe9c934c8b836251", "H0026_LA",
    "5f1b1698fe9c934c8b836245", "H0020_LA_new",
    "5f1b1694fe9c934c8b83621a", "HCAHeart7880863",
    "5f1b1698fe9c934c8b836240", "H0035_RA",
    "5f1b1697fe9c934c8b836237", "H0026_septum2",
    "5f1b1693fe9c934c8b836213", "HCAHeart7833853",
    "5f1b1698fe9c934c8b83623e", "HCAHeart8287128",
    "5f1b1694fe9c934c8b836221", "HCAHeart8287123",
    "5f1b1699fe9c934c8b836248", "H0020_LV",
    "5f1b1697fe9c934c8b836234", "H0020_apex",
    "5f1b1695fe9c934c8b836227", "HCAHeart7888926",
    "5f1b1692fe9c934c8b83620a", "HCAHeart7888922",
    "5f1b168ffe9c934c8b8361e8_1", "HCAHeart7664654",
    "5f1b168ffe9c934c8b8361e8_2", "HCAHeart7757636",
    "5f1b168ffe9c934c8b8361e8_3", "HCAHeart7985086",
    "5f1b168ffe9c934c8b8361ea", "HCAHeart7833852",
    "5f1b1699fe9c934c8b83624e", "H0025_apex",
    "5f1b1697fe9c934c8b83623c", "H0035_RV",
    "5f1b168ffe9c934c8b8361e5_1", "HCAHeart7829976",
    "5f1b168ffe9c934c8b8361e5_2", "HCAHeart7985089",
    "5f1b168ffe9c934c8b8361e7", "HCAHeart7836683",
    "5f1b1695fe9c934c8b83622a", "H0015_septum",
    "5f1b1699fe9c934c8b83624f", "H0025_LA",
    "5f1b168efe9c934c8b8361db_1", "HCAHeart7702881",
    "5f1b168efe9c934c8b8361db_2", "HCAHeart7702882",
    "5f1b1694fe9c934c8b836220", "HCAHeart7833854",
    "5f1b168ffe9c934c8b8361f3", "HCAHeart7757639",
    "5f1b168efe9c934c8b8361e0", "HCAHeart7964513",
    "5f1b168efe9c934c8b8361e2_1", "HCAHeart7664653",
    "5f1b168efe9c934c8b8361e2_2", "HCAHeart7702873",
    "5f1b1694fe9c934c8b836218", "HCAHeart7880861",
    "5f1b1699fe9c934c8b83624b", "HCAHeart8287126",
    "5f1b1698fe9c934c8b836243", "H0037_LV",
    "5f1b1692fe9c934c8b836207", "HCAHeart7829979",
    "5f1b168ffe9c934c8b8361e4", "HCAHeart7698016",
    "5f1b1699fe9c934c8b83624c", "H0035_apex",
    "5f1b168efe9c934c8b8361e1_1", "HCAHeart7757638",
    "5f1b168efe9c934c8b8361e1_2", "HCAHeart7985088",
    "5f1b1697fe9c934c8b83623a", "H0015_LA_new",
    "5f1b168ffe9c934c8b8361eb_1", "HCAHeart7702874",
    "5f1b168ffe9c934c8b8361eb_2", "HCAHeart7702875",
    "5f1b168ffe9c934c8b8361f1", "HCAHeart7835149",
    "5f1b168efe9c934c8b8361dd", "HCAHeart7698017",
    "5f1b1691fe9c934c8b8361f8", "HCAHeart7888927",
    "5f1b168efe9c934c8b8361d9", "HCAHeart7829977",
    "5f1b1696fe9c934c8b836232", "H0025_LV",
    "5f1b1691fe9c934c8b836203", "HCAHeart7888929",
    "5f1b1690fe9c934c8b8361f6", "HCAHeart7836681",
    "5f1b1695fe9c934c8b836226", "HCAHeart7888925",
    "5f1b1699fe9c934c8b83624a", "H0026_LV_V3",
    "5f1b168ffe9c934c8b8361f0_1", "HCAHeart7664652",
    "5f1b168ffe9c934c8b8361f0_2", "HCAHeart7698015",
    "5f1b1691fe9c934c8b8361fe", "HCAHeart7880860",
    "5f1b1699fe9c934c8b83624d", "H0015_apex",
    "5f1b168ffe9c934c8b8361ed_1", "HCAHeart7702878",
    "5f1b168ffe9c934c8b8361ed_2", "HCAHeart7702879",
    "5f1b168ffe9c934c8b8361e3", "HCAHeart7833855",
    "5f1b168ffe9c934c8b8361ef_1", "HCAHeart7702876",
    "5f1b168ffe9c934c8b8361ef_2", "HCAHeart7702877",
    "5f1b168ffe9c934c8b8361f2", "HCAHeart7835148",
    "5f1b1692fe9c934c8b836210", "HCAHeart7888924",
    "5f1b168efe9c934c8b8361de_1", "HCAHeart7757637",
    "5f1b168efe9c934c8b8361de_2", "HCAHeart7985087",
    "5f1b1696fe9c934c8b83622e", "H0026_apex",
    "5f1b1693fe9c934c8b836217", "HCAHeart7888923",
    "5f1b1697fe9c934c8b83623d", "HCAHeart7888928",
    "5f1b1697fe9c934c8b836235", "H0020_RV",
    "5f1b1698fe9c934c8b83623f", "H0025_RA",
    "5f1b1697fe9c934c8b836236", "HCAHeart7836684",
    "5f1b1698fe9c934c8b836242", "H0025_septum",
    "5f1b1691fe9c934c8b8361fb", "HCAHeart7880862",
    "5f1b1698fe9c934c8b836247", "HCAHeart8287127",
    "5f1b1692fe9c934c8b836209", "HCAHeart7829978",
    "5f1b1695fe9c934c8b836229", "HCAHeart8102865",
    "5f1b1695fe9c934c8b836228", "HCAHeart7850548",
    "5f1b1692fe9c934c8b83620c", "HCAHeart7844004",
    "5f1b1694fe9c934c8b83621f", "HCAHeart7905331",
    "5f1b1694fe9c934c8b83621b", "HCAHeart7728605",
    "5f1b1695fe9c934c8b836223", "HCAHeart8102866",
    "5f1b1694fe9c934c8b836219", "HCAHeart7850545",
    "5f1b1696fe9c934c8b83622f", "HCAHeart7844000",
    "5f1b1692fe9c934c8b836211", "HCAHeart7843999",
    "5f1b1694fe9c934c8b83621e", "HCAHeart7844002",
    "5f1b1692fe9c934c8b83620e", "HCAHeart7850539",
    "5f1b1692fe9c934c8b83620b", "HCAHeart7728607",
    "5f1b168ffe9c934c8b8361e6", "HCAHeart7656538",
    "5f1b1691fe9c934c8b836202", "HCAHeart7905329",
    "5f1b1691fe9c934c8b8361ff", "HCAHeart8102868",
    "5f1b1691fe9c934c8b8361fd", "HCAHeart7745969",
    "5f1b1696fe9c934c8b83622d", "HCAHeart8102861",
    "5f1b168efe9c934c8b8361df", "HCAHeart7728609",
    "5f1b1691fe9c934c8b836205", "HCAHeart7905327",
    "5f1b168efe9c934c8b8361da", "HCAHeart7656534",
    "5f1b1691fe9c934c8b836204", "HCAHeart7745966",
    "5f1b168ffe9c934c8b8361ec", "HCAHeart7606896",
    "5f1b1695fe9c934c8b836225", "HCAHeart7850541",
    "5f1b1693fe9c934c8b836214", "HCAHeart8102863",
    "5f1b1693fe9c934c8b836216", "HCAHeart7850542",
    "5f1b1691fe9c934c8b836201", "HCAHeart7844001",
    "5f1b1694fe9c934c8b83621d", "HCAHeart7850543",
    "5f1b1696fe9c934c8b836231", "HCAHeart7905330",
    "5f1b1697fe9c934c8b83623b", "HCAHeart7850547",
    "5f1b1691fe9c934c8b8361f7", "HCAHeart7728604",
    "5f1b168ffe9c934c8b8361f4", "HCAHeart7728606",
    "5f1b1691fe9c934c8b836200", "HCAHeart7745970",
    "5f1b168ffe9c934c8b8361e9", "HCAHeart7656536",
    "5f1b1692fe9c934c8b836208", "HCAHeart7850549",
    "5f1b168ffe9c934c8b8361ee", "HCAHeart7656537",
    "5f1b1695fe9c934c8b836224", "HCAHeart7905328",
    "5f1b1694fe9c934c8b83621c", "HCAHeart7850540",
    "5f1b1691fe9c934c8b8361f9", "HCAHeart7905332",
    "5f1b168efe9c934c8b8361dc", "HCAHeart7656535",
    "5f1b1691fe9c934c8b8361fa", "HCAHeart7745967",
    "5f1b1692fe9c934c8b836206", "HCAHeart7728608",
    "5f1b1695fe9c934c8b83622b", "HCAHeart7850544",
    "5f1b1696fe9c934c8b836230", "HCAHeart8102864",
    "5f1b1698fe9c934c8b836244", "HCAHeart8102867",
    "5f1b1696fe9c934c8b836233", "HCAHeart8102862",
    "5f1b1692fe9c934c8b83620d", "HCAHeart7751845",
    "5f1b1693fe9c934c8b836215", "HCAHeart8102858",
    "5f1b1697fe9c934c8b836238", "HCAHeart7844003",
    "5f1b1697fe9c934c8b836239", "HCAHeart8102860",
    "5f1b1692fe9c934c8b83620f", "HCAHeart7745968",
    "5f1b1699fe9c934c8b836249", "HCAHeart7850551",
    "5f1b1698fe9c934c8b836246", "HCAHeart8102857",
    "5f1b1692fe9c934c8b836212", "HCAHeart7850546",
    "5f1b1699fe9c934c8b836250", "HCAHeart8102859"
)


Cross-reference single cell/nucleus identity and metadata (EBI, HCA metadata, HCA h5ad).

cells_predefined <- embedding_predefined %>%
    dplyr::select(X1, sample) %>%
    dplyr::left_join(cell_ids, by = c("sample" = "predefined_id")) %>%
    dplyr::mutate(
        cell = str_c(base_name, str_extract(X1, "[A-Z]{16}"), sep = "_")
    ) %>%
    dplyr::left_join(
        adata$obs %>%
            as.data.frame() %>%
            tibble::rownames_to_column(var = "cell")
    )
## Joining, by = "cell"
embedding_predefined <- embedding_predefined %>%
    left_join(cells_predefined)
## Joining, by = c("X1", "sample")
embedding_predefined %>%
    dplyr::select(
        NRP, age_group, cell_source, donor, gender,
        region, sample, source, type, version
    ) %>%
    unique()


Sample: SAMEA7249652 (CD45+ cells from specimen CBTM-473C_RA) not included in the publication. https://www.ebi.ac.uk/ena/browser/view/ERS5009270

Biomaterial Id: CBTM-473C_RA_cd45_pos

HCA_Biomaterial_UUID: 2e030e2a-3df4-4eec-8513-8833819bece5

Library name: CBTM-473C_RA_cd45_pos


Summary

Cell types, based on the publication

embedding_predefined %>%
    dplyr::group_by(cell_type) %>%
    dplyr::summarize(
        num_cell = n(),
        median_umi_reported = median(n_counts),
        median_features_reported = median(n_genes),
        median_umi = median(num_umis),
        median_features = median(num_features),
    ) %>%
    gt::gt() %>%
    # gt::cols_label(n = "num_cells") %>%
    gt::tab_options(table.font.size = "median")
cell_type num_cell median_umi_reported median_features_reported median_umi median_features
Adipocytes 3799 3875.0 1707.0 3764.0 1682.0
Atrial_Cardiomyocyte 23483 2868.0 1399.0 2792.0 1358.0
doublets 623 688.0 484.0 709.0 493.0
Endothelial 100579 1585.0 909.0 1690.0 959.0
Fibroblast 59341 1195.0 757.0 1196.0 758.0
Lymphoid 17217 1552.0 831.0 1560.0 845.0
Mesothelial 718 1967.0 1082.5 2022.5 1082.0
Myeloid 23028 1655.5 896.0 1665.5 913.0
Neuronal 3961 1039.0 716.0 997.0 693.0
NotAssigned 33998 2186.0 1230.0 2215.0 1250.5
Pericytes 77856 1321.0 790.0 1337.0 806.0
Smooth_muscle_cells 16242 1831.0 988.0 1790.0 988.0
Ventricular_Cardiomyocyte 125289 3621.0 1545.0 3706.0 1588.0


Anatomical region

embedding_predefined %>%
    dplyr::group_by(region) %>%
    dplyr::summarize(
        num_cell = n(),
        median_umi = median(num_umis),
        median_features = median(num_features),
    ) %>%
    gt::gt() %>%
    # gt::cols_label(n = "num_cells") %>%
    gt::tab_options(table.font.size = "median")
region num_cell median_umi median_features
AX 98251 1973 1047
LA 72734 1771 978
LV 107261 2256 1166
RA 41503 1440 868
RV 84519 2308 1186
SP 81866 2015 1058


Chromium Single Cell 3’ Reagent Kit version

embedding_predefined %>%
    group_by(version) %>%
    summarise(
        num_cell = n(),
        median_umi_reported = median(n_counts),
        median_features_reported = median(n_genes),
        median_umi = median(num_umis),
        median_features = median(num_features)
    ) %>%
    gt::gt() %>%
    gt::tab_options(table.font.size = "median")
version num_cell median_umi_reported median_features_reported median_umi median_features
V2 238154 1511 837 1536 850
V3 247980 2367 1253 2468 1310


Cell source

embedding_predefined %>%
    group_by(cell_source) %>%
    summarise(
        num_cell = n(),
        median_umi = median(num_umis),
        median_features = median(num_features),
    ) %>%
    gt::gt() %>%
    gt::tab_options(table.font.size = "median")
cell_source num_cell median_umi median_features
Harvard-Nuclei 178161 2201 1236
Sanger-CD45 80597 2079 1004
Sanger-Cells 45885 3468 1590
Sanger-Nuclei 181491 1442 845


Source

embedding_predefined %>%
    group_by(source) %>%
    summarise(
        num_cell = n(),
        median_umi = median(num_umis),
        median_features = median(num_features),
    ) %>%
    gt::gt() %>%
    gt::tab_options(table.font.size = "median")
source num_cell median_umi median_features
CD45+ 80597 2079 1004
Cells 45885 3468 1590
Nuclei 359652 1830 1034


Age group

embedding_predefined %>%
    group_by(age_group) %>%
    summarise(
        num_cell = n(),
        median_umi = median(num_umis),
        median_features = median(num_features),
    ) %>%
    gt::gt() %>%
    gt::tab_options(table.font.size = "median")
age_group num_cell median_umi median_features
40-45 26934 3050 1544
45-50 32336 2192 1232
50-55 108472 2163 1196
55-60 52838 1437 863
60-65 128207 2136 1079
65-70 108449 1930 983
70-75 28898 1162 702


Donor

embedding_predefined %>%
    group_by(donor) %>%
    summarise(
        num_cell = n(),
        median_umi = median(num_umis),
        median_features = median(num_features),
    ) %>%
    gt::gt() %>%
    gt::tab_options(table.font.size = "median")
donor num_cell median_umi median_features
D1 15397 1997.0 895
D11 48930 2909.5 1325
D2 43143 1414.0 845
D3 27022 1217.0 757
D4 28898 1162.0 702
D5 28799 1416.0 759
D6 79650 2097.0 1068
D7 36134 2182.5 1074
H2 22216 1841.0 1107
H3 32458 2118.0 1227
H4 25816 1648.0 974
H5 38401 2482.0 1337
H6 26934 3050.0 1544
H7 32336 2192.0 1232


Type: Donation after Circulatory Death (DCD), Donation after Brain Death (DCD)

embedding_predefined %>%
    group_by(type) %>%
    summarise(
        num_cell = n(),
        median_umi = median(num_umis),
        median_features = median(num_features),
    ) %>%
    gt::gt() %>%
    gt::tab_options(table.font.size = "median")
type num_cell median_umi median_features
DBD 220580 2076 1157
DCD 265554 1943 1000


Adult human heart’s celluar landscape

Pre-defined

Cell embedding and annotation are extracted from h5ad file provided on the HCA portal.

EMBEDDING_TITLE_PREFIX <- "UMAP"
embedding_type <- "predefined"

Cell type & region

GEOM_POINT_SIZE <- 0.1
p_embedding_predefined_cell_type <- plot_embedding(
    embedding = embedding_predefined[, c("x_umap", "y_umap")],
    color_values = embedding_predefined$cell_type %>% as.factor(),
    label = paste0(EMBEDDING_TITLE_PREFIX, "; Cell type"),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = GEOM_POINT_SIZE,
    sort_values = FALSE,
    rasterise = FALSE
) +
    theme_customized()

p_embedding_predefined_region <- plot_embedding(
    embedding = embedding_predefined[, c("x_umap", "y_umap")],
    color_values = embedding_predefined$region %>% as.factor(),
    label = paste0(EMBEDDING_TITLE_PREFIX, "; Region"),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = GEOM_POINT_SIZE,
    sort_values = FALSE,
    rasterise = TRUE,
    legend_ncol = 2
) +
    scale_color_manual(
        values = scales::hue_pal()(
            embedding_predefined$region %>% unique() %>% length()
        )
    ) +
    theme_customized()

p_embedding_predefined_donor <- plot_embedding(
    embedding = embedding_predefined[, c("x_umap", "y_umap")],
    color_values = embedding_predefined$donor %>% as.factor(),
    label = paste0(EMBEDDING_TITLE_PREFIX, "; Donor"),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = GEOM_POINT_SIZE,
    sort_values = FALSE,
    rasterise = TRUE,
    legend_ncol = 2
) +
    scale_color_manual(
        values = scales::hue_pal()(
            embedding_predefined$donor %>% unique() %>% length()
        )
    ) +
    theme_customized()

p_embedding_predefined_version <- plot_embedding(
    embedding = embedding_predefined[, c("x_umap", "y_umap")],
    color_values = embedding_predefined$version %>% as.factor(),
    label = paste0(EMBEDDING_TITLE_PREFIX, "; Version"),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = GEOM_POINT_SIZE,
    sort_values = FALSE,
    rasterise = TRUE
) +
    scale_color_manual(
        values = scales::hue_pal()(
            embedding_predefined$version %>% unique() %>% length()
        )
    ) +
    theme_customized()

p_embedding_predefined_source <- plot_embedding(
    embedding = embedding_predefined[, c("x_umap", "y_umap")],
    color_values = embedding_predefined$source %>% as.factor(),
    label = paste0(EMBEDDING_TITLE_PREFIX, "; Source"),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = GEOM_POINT_SIZE,
    sort_values = FALSE,
    rasterise = TRUE
) +
    scale_color_manual(
        values = scales::hue_pal()(
            embedding_predefined$source %>% unique() %>% length()
        )
    ) +
    theme_customized()

p_embedding_predefined_type <- plot_embedding(
    embedding = embedding_predefined[, c("x_umap", "y_umap")],
    color_values = embedding_predefined$type %>% as.factor(),
    label = paste0(EMBEDDING_TITLE_PREFIX, "; Type"),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = GEOM_POINT_SIZE,
    sort_values = FALSE,
    rasterise = TRUE
) +
    scale_color_manual(
        values = scales::hue_pal()(
            embedding_predefined$type %>% unique() %>% length()
        )
    ) +
    theme_customized()
list(
    p_embedding_predefined_cell_type,
    p_embedding_predefined_region,
    p_embedding_predefined_donor,
    p_embedding_predefined_version,
    p_embedding_predefined_source,
    p_embedding_predefined_type
) %>%
    purrr::reduce(`+`) +
    patchwork::plot_layout(ncol = 3) +
    patchwork::plot_annotation(
        theme = theme(plot.margin = margin())
    )

Mitochondria ratio distribution

embedding_predefined$percent_mito %>% range()
## [1] 0.0000000 0.1999672
p_embedding_predefined_MT <- plot_embedding(
    embedding = embedding_predefined[, c("x_umap", "y_umap")],
    color_values = embedding_predefined$percent_mito,
    label = paste0(EMBEDDING_TITLE_PREFIX, "; mitochondria %"),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = GEOM_POINT_SIZE,
    geom_point_alpha = 1,
    sort_values = TRUE,
    shuffle_values = FALSE,
    label_size = 2.5,
    label_hjust = 0,
    label_vjust = 0,
    rasterise = TRUE,
    dpi = 600,
    legend_size = 3,
    legend_ncol = 1
) +
    theme_customized(
        x = 0.035,
        y = 0.995,
        legend_key_size = 1.5,
        legend_text_size = 4,
        border_color = "#b3b3b3",
        strip_color = "#b3b3b3"
    )

MT_RATIO_THRESHOLD <- 0.15
p_embedding_predefined_MT2 <- plot_embedding(
    embedding = embedding_predefined[, c("x_umap", "y_umap")],
    color_values = as.numeric(
        embedding_predefined$percent_mito >= MT_RATIO_THRESHOLD
    ) %>% as.factor(),
    label = paste(
        "UMAP", MT_RATIO_THRESHOLD,
        sum(embedding_predefined$percent_mito >= MT_RATIO_THRESHOLD),
        sep = "; "
    ),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = FALSE,
    geom_point_size = GEOM_POINT_SIZE,
    sort_values = TRUE,
    rasterise = TRUE
) +
    scale_color_manual(
        values = c("grey70", "salmon")
    ) +
    theme_customized()


The mitochondria ratios are extracted from h5ad file download from the HCA portal. There are 0 cells with mitochondria ratio higher than 0.2.

list(p_embedding_predefined_MT, p_embedding_predefined_MT2) %>%
    purrr::reduce(`+`) +
    patchwork::plot_layout(ncol = 2) +
    patchwork::plot_annotation(
        theme = theme(plot.margin = margin())
    )

embedding_predefined %>%
    dplyr::group_by(source) %>%
    dplyr::summarise(
        num_cells = n(),
        median_mt_ratio_reported = median(percent_mito)
    ) %>%
    gt::gt() %>%
    gt::tab_options(table.font.size = "median")
source num_cells median_mt_ratio_reported
CD45+ 80597 0.046094750
Cells 45885 0.072591010
Nuclei 359652 0.001283697


Doublets and unassigned cells

purrr::map2(list("doublets", "NotAssigned"), c(0.15, 0.15), function(x, y) {
    plot_embedding(
        embedding = embedding_predefined[, c("x_umap", "y_umap")],
        color_values = as.numeric(embedding_predefined$cell_type == x) %>% as.factor(),
        label = paste(
            "UMAP", x, sum(embedding_predefined$cell_type == x),
            sep = "; "
        ),
        label_position = NULL,
        show_color_value_labels = FALSE,
        show_color_legend = FALSE,
        geom_point_size = y,
        sort_values = TRUE,
        rasterise = TRUE
    ) +
        scale_color_manual(
            values = c("grey70", "salmon")
        ) +
        theme_customized()
}) %>%
    purrr::reduce(`+`) +
    patchwork::plot_layout(ncol = 2) +
    patchwork::plot_annotation(
        theme = theme(plot.margin = margin())
    )

p_embedding_predefined_scrublet <- plot_embedding(
    embedding = embedding_predefined[, c("x_umap", "y_umap")],
    color_values = embedding_predefined$scrublet_score,
    label = paste0(EMBEDDING_TITLE_PREFIX, "; Scrublet_score"),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = GEOM_POINT_SIZE,
    geom_point_alpha = 1,
    sort_values = TRUE,
    shuffle_values = FALSE,
    label_size = 2.5,
    label_hjust = 0,
    label_vjust = 0,
    rasterise = TRUE,
    dpi = 600,
    legend_size = 3,
    legend_ncol = 1
) +
    theme_customized(
        x = 0.035,
        y = 0.995,
        legend_key_size = 1.5,
        legend_text_size = 4,
        border_color = "#b3b3b3",
        strip_color = "#b3b3b3"
    )

SCRUBLET_SCORE_THRESHOLD <- 0.3
p_embedding_predefined_scrublet2 <- plot_embedding(
    embedding = embedding_predefined[, c("x_umap", "y_umap")],
    color_values = as.numeric(
        embedding_predefined$scrublet_score >= SCRUBLET_SCORE_THRESHOLD
    ) %>% as.factor(),
    label = paste(
        "UMAP", SCRUBLET_SCORE_THRESHOLD,
        sum(embedding_predefined$scrublet_score >= SCRUBLET_SCORE_THRESHOLD),
        sep = "; "
    ),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = FALSE,
    geom_point_size = GEOM_POINT_SIZE,
    sort_values = TRUE,
    rasterise = TRUE
) +
    scale_color_manual(
        values = c("grey70", "salmon")
    ) +
    theme_customized()


The scrublet scores are extracted from h5ad file download from the HCA portal. There are 0 cells with scrublet scores higher than 0.3.

list(p_embedding_predefined_scrublet, p_embedding_predefined_scrublet2) %>%
    purrr::reduce(`+`) +
    patchwork::plot_layout(ncol = 2) +
    patchwork::plot_annotation(
        theme = theme(plot.margin = margin())
    )


The distribution of scrublet scores for cells annotated as “doublets” and “NotAssigned” in the publication.

embedding_predefined %>%
    dplyr::filter(cell_type %in% c("doublets", "NotAssigned")) %>%
    dplyr::group_by(cell_type) %>%
    dplyr::summarise(
        num_cells = n(),
        median_scrublet_score_reported = median(scrublet_score),
        median_mt_ratio_reported = median(percent_mito)
    ) %>%
    gt::gt() %>%
    gt::tab_options(table.font.size = "median")
cell_type num_cells median_scrublet_score_reported median_mt_ratio_reported
doublets 623 0.1254753 0.007220217
NotAssigned 33998 0.1471215 0.011605416


Inconsistent cells

Cells with low UMI counts after re-mapping.

embedding_predefined %>%
    filter(num_umis == 0) %>%
    summarise(
        num_cells = n(),
        median_umis_reported = median(n_counts),
        median_features_reported = median(n_genes)
    ) %>%
    gt::gt() %>%
    gt::tab_options(table.font.size = "median")
num_cells median_umis_reported median_features_reported
4065 2681 1489


plot_embedding(
    embedding = embedding_predefined[, c("x_umap", "y_umap")],
    color_values = as.numeric(embedding_predefined$num_umis == 0) %>% as.factor(),
    label = paste(
        "UMAP", sum(embedding_predefined$num_umis == 0),
        sep = "; "
    ),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = FALSE,
    geom_point_size = 0.3,
    sort_values = TRUE,
    rasterise = TRUE
) +
    scale_color_manual(
        values = c("grey70", "salmon")
    ) +
    theme_customized()

Cell group

cells_selected_atrial <- c(
    "aCM1",
    "aCM2",
    "aCM3",
    "aCM4",
    "aCM5"
)

cells_selected_ventricular <- c(
    "vCM1",
    "vCM2",
    "vCM3",
    "vCM4",
    "vCM5"
)

cells_selected_vascular <- c(
    "EC1_cap",
    "EC10_CMC-like",
    "EC2_cap",
    "EC3_cap",
    "EC4_immune",
    "EC5_art",
    "EC6_ven",
    "EC7_atria",
    "EC8_ln",
    "EC9_FB-like",
    "Meso",
    "PC1_vent",
    "PC2_atria",
    "PC3_str",
    "PC4_CMC-like",
    "SMC1_basic",
    "SMC2_art"
)

cells_selected_fibroblast <- c(
    "FB1",
    "FB2",
    "FB3",
    "FB4",
    "FB5",
    "FB6",
    "FB7"
)

cells_selected_immune <- c(
    "B_cells",
    "CD14+Mo",
    "CD16+Mo",
    "CD4+T_cytox",
    "CD4+T_tem",
    "CD8+T_cytox",
    "CD8+T_tem",
    "DC",
    "DOCK4+MØ1",
    "DOCK4+MØ2",
    "doublets",
    "IL17RA+Mo",
    "LYVE1+MØ1",
    "LYVE1+MØ2",
    "LYVE1+MØ3",
    "Mast",
    "MØ_AgP",
    "MØ_mod",
    "Mo_pi",
    "NK",
    "NKT",
    "NØ"
)

cells_selected_adipocyte <- c(
    "Adip1",
    "Adip2",
    "Adip3",
    "Adip4"
)

cells_selected_neuronal <- c(
    "NC1",
    "NC2",
    "NC3",
    "NC4",
    "NC5",
    "NC6"
)

cells_selected <- list(
    atrial = cells_selected_atrial,
    ventricular = cells_selected_ventricular,
    vascular = cells_selected_vascular,
    fibroblast = cells_selected_fibroblast,
    immune = cells_selected_immune,
    adipocyte = cells_selected_adipocyte,
    neuronal = cells_selected_neuronal
)
GEOM_POINT_SIZE <- 0.1

purrr::map2(names(cells_selected), c(TRUE, TRUE, FALSE, TRUE, FALSE, TRUE, TRUE), function(x, y) {
    cells_selected_in_group <- cells_selected[[x]]

    color_palette_cell_state <- setNames(
        object = c("grey70", scales::hue_pal()(length(cells_selected_in_group))),
        nm = c("Other", cells_selected_in_group)
    )

    cell_type_labels <- get_middle_points(
        embedding = embedding_predefined[, c("x_umap", "y_umap", "cell_states")],
        x = "x_umap",
        y = "y_umap",
        group = "cell_states"
    ) %>%
        filter(cell_states %in% cells_selected_in_group)

    plot_embedding(
        embedding = embedding_predefined[, c("x_umap", "y_umap")],
        color_values = embedding_predefined %>%
            mutate(
                cell_states = case_when(
                    cell_states %in% cells_selected_in_group ~ cell_states,
                    TRUE ~ "Other"
                )
            ) %>%
            pull(cell_states) %>%
            as.factor() %>%
            forcats::fct_relevel("Other", after = Inf),
        label = paste0(EMBEDDING_TITLE_PREFIX, "; ", x),
        label_position = NULL,
        show_color_value_labels = FALSE,
        show_color_legend = y,
        geom_point_size = GEOM_POINT_SIZE,
        sort_values = FALSE,
        rasterise = TRUE
    ) +
        scale_color_manual(
            values = color_palette_cell_state
        ) +
        theme_customized() +
        ggrepel::geom_text_repel(
            data = cell_type_labels,
            ggplot2::aes(
                x = .data[["x_umap"]],
                y = .data[["y_umap"]],
                label = .data[["cell_states"]],
            ),
            color = "black",
            size = 1.8,
            family = "Arial",
            #
            box.padding = 0.4,
            point.padding = 1e-06,
            min.segment.length = 0,
            arrow = ggplot2::arrow(length = unit(0.015, "npc")),
            max.overlaps = Inf,
            nudge_x = 0,
            nudge_y = 0,
            #
            segment.color = "grey35",
            # segment.size = 0.25,
            segment.size = 0.1,
            segment.alpha = 1,
            # segment.inflect = TRUE,
            seed = 20201121
        )
}) %>%
    purrr::reduce(`+`) +
    patchwork::plot_layout(ncol = 3) +
    patchwork::plot_annotation(
        theme = theme(plot.margin = margin())
    )

embedding_predefined %>%
    dplyr::group_by(cell_states) %>%
    dplyr::summarize(
        num_cell = n(),
        median_umi = median(num_umis),
        median_features = median(num_features),
        median_scrublet_score_reported = median(scrublet_score),
        median_mt_ratio_reported = median(percent_mito)
    ) %>%
    gt::gt() %>%
    gt::tab_options(table.font.size = "median")
cell_states num_cell median_umi median_features median_scrublet_score_reported median_mt_ratio_reported
aCM1 12885 2547.0 1246.0 0.09822866 0.0008253095
aCM2 4733 3061.0 1534.0 0.10299003 0.0007209805
aCM3 4096 3005.5 1344.0 0.09822866 0.0007457121
aCM4 1581 3088.0 1478.0 0.10806175 0.0008806693
aCM5 188 3362.5 1616.5 0.16473318 0.0009240758
Adip1 2274 2869.5 1367.0 0.09822866 0.0005963037
Adip2 991 8433.0 3615.0 0.18575064 0.0009694193
Adip3 290 3930.5 1742.5 0.11347518 0.0006406224
Adip4 244 1933.0 1234.0 0.15555556 0.0017569620
B_cells 1195 1873.0 926.0 0.13214990 0.0224461740
CD14+Mo 1883 3458.0 1308.0 0.09822866 0.0657966300
CD16+Mo 3278 3820.0 1522.0 0.13214990 0.0436562990
CD4+T_cytox 3113 916.0 559.0 0.09822866 0.0257104190
CD4+T_tem 1047 2613.0 947.0 0.10806175 0.0313971750
CD8+T_cytox 2956 2176.0 1085.5 0.09822866 0.0205256885
CD8+T_tem 3001 1073.0 699.0 0.08177905 0.0076117980
DC 814 2411.5 1380.0 0.11926606 0.0061114308
DOCK4+MØ1 3239 902.0 641.0 0.08952959 0.0033921304
DOCK4+MØ2 1626 1256.5 844.0 0.07482993 0.0015359029
doublets 623 709.0 493.0 0.12547529 0.0072202166
EC1_cap 27417 1375.0 813.0 0.08554572 0.0384615400
EC10_CMC-like 2608 2444.5 1219.0 0.09822866 0.0378953555
EC2_cap 13445 1583.0 897.0 0.08952959 0.0343137270
EC3_cap 16897 2072.0 1089.0 0.09822866 0.0253164560
EC4_immune 5637 1545.0 886.0 0.09375000 0.0506863780
EC5_art 20317 2196.0 1178.0 0.10299003 0.0322479640
EC6_ven 8486 2114.5 1125.5 0.10299003 0.0244897960
EC7_atria 4485 1121.0 752.0 0.13934426 0.0020979021
EC8_ln 754 1536.0 997.5 0.12547529 0.0067475975
EC9_FB-like 533 2307.0 1453.0 0.16473318 0.0035375461
FB1 26632 1109.0 700.0 0.10299003 0.0020387360
FB2 13779 1202.0 777.0 0.11926606 0.0013495276
FB3 7744 1223.0 763.5 0.13214990 0.0017438692
FB4 6146 1374.5 860.0 0.11926606 0.0024545902
FB5 2518 1296.0 821.0 0.10806175 0.0028169039
FB6 1610 2685.0 1610.0 0.17475728 0.0024872102
FB7 912 3092.0 1698.5 0.17475728 0.0035532101
IL17RA+Mo 32 4715.5 2431.0 0.10823261 0.0012512804
LYVE1+MØ1 3018 2608.0 1294.5 0.07821229 0.0127186405
LYVE1+MØ2 2088 814.0 556.0 0.09375000 0.0059259390
LYVE1+MØ3 1957 1762.0 915.0 0.08952959 0.0069808030
Mast 1543 959.0 687.0 0.04864865 0.0018957346
Meso 718 2022.5 1082.0 0.13934426 0.0078040273
MØ_AgP 1278 4147.5 1505.0 0.13934426 0.0526469800
MØ_mod 1313 3057.0 1254.0 0.08952959 0.0545101840
Mo_pi 1652 1116.0 623.5 0.14712154 0.0657877400
nan 33998 2215.0 1250.5 0.14712154 0.0116054155
NC1 3129 863.0 616.0 0.07482993 0.0028328612
NC2 445 2545.0 1551.0 0.11926606 0.0029779630
NC3 147 2140.0 1189.0 0.12547529 0.0099825310
NC4 126 4615.5 2140.0 0.13934426 0.0017484877
NC5 90 2926.0 1721.5 0.04371099 0.0014946246
NC6 24 1321.0 911.0 0.16014437 0.0021411657
NK 3628 2022.5 980.5 0.06565657 0.0282486680
NKT 1463 1128.0 674.0 0.09375000 0.0327654000
121 1091.0 497.0 0.13214990 0.0732064400
PC1_vent 50095 1223.0 742.0 0.07482993 0.0026702270
PC2_atria 11323 1442.0 877.0 0.07161804 0.0017605633
PC3_str 14231 1706.0 998.0 0.11347518 0.0194489470
PC4_CMC-like 2207 2897.0 1551.0 0.15555556 0.0016936850
SMC1_basic 13114 1618.0 923.0 0.08177905 0.0031491345
SMC2_art 3128 3517.0 1501.0 0.10299003 0.0263081000
vCM1 71162 3593.0 1547.0 0.10299003 0.0006570302
vCM2 25658 4019.0 1734.0 0.10806175 0.0006337638
vCM3 20168 3514.0 1506.0 0.10806175 0.0006070417
vCM4 7651 4055.0 1657.0 0.11347518 0.0006937218
vCM5 650 3606.0 1683.5 0.16473318 0.0010016029


Expression

Embedding

# start
FEATURES_SELECTED_A <- c(
    "ENSG00000092054_MYH7",
    "ENSG00000111245_MYL2",
    "ENSG00000115641_FHL2",
    "ENSG00000175206_NPPA",
    "ENSG00000106631_MYL7",
    "ENSG00000198336_MYL4",
    "ENSG00000133392_MYH11",
    "ENSG00000149591_TAGLN",
    "ENSG00000107796_ACTA2"
)

FEATURES_SELECTED_B <- c(
    "ENSG00000143248_RGS5",
    "ENSG00000069431_ABCC9",
    "ENSG00000121361_KCNJ8",
    "ENSG00000119927_GPAM",
    "ENSG00000169710_FASN",
    "ENSG00000174697_LEP",
    "ENSG00000011465_DCN",
    "ENSG00000148180_GSN",
    "ENSG00000134853_PDGFRA"
)

FEATURES_SELECTED_C <- c(
    "ENSG00000110799_VWF",
    "ENSG00000261371_PECAM1",
    "ENSG00000179776_CDH5",
    "ENSG00000123560_PLP1",
    "ENSG00000179915_NRXN1",
    "ENSG00000021645_NRXN3",
    "ENSG00000170458_CD14",
    "ENSG00000173372_C1QA",
    "ENSG00000129226_CD68",
    "ENSG00000153563_CD8A",
    "ENSG00000168685_IL7R",
    "ENSG00000102245_CD40LG",
    "ENSG00000102854_MSLN",
    "ENSG00000184937_WT1",
    "ENSG00000169594_BNC1"
)
FEATURES_SELECTED <- c(
    FEATURES_SELECTED_A,
    FEATURES_SELECTED_B,
    FEATURES_SELECTED_C
)


Grey: cells included in the publication but have low UMI counts after re-mapping.

GEOM_POINT_SIZE <- 0.15

purrr::map(FEATURES_SELECTED, function(x) {
    SELECTED_FEATURE <- x

    plot_embedding(
        embedding = embedding_predefined[, c("x_umap", "y_umap")],
        color_values = log10(
            calc_cpm_anndata(
                anndata = adata,
                k = SELECTED_FEATURE,
                cells = embedding_predefined$cell
            ) + 1
        ),
        label = paste0(EMBEDDING_TITLE_PREFIX, "; ", SELECTED_FEATURE),
        label_position = NULL,
        show_color_value_labels = FALSE,
        show_color_legend = TRUE,
        geom_point_size = GEOM_POINT_SIZE,
        geom_point_alpha = 1,
        sort_values = TRUE,
        shuffle_values = FALSE,
        label_size = 2.5,
        label_hjust = 0,
        label_vjust = 0,
        rasterise = TRUE,
        dpi = 600,
        legend_size = 3,
        legend_ncol = 1
    ) +
        theme_customized(
            x = 0.035,
            y = 0.995,
            legend_key_size = 1.5,
            legend_text_size = 4,
            border_color = "#b3b3b3",
            strip_color = "#b3b3b3"
        )
}) %>%
    purrr::reduce(`+`) +
    patchwork::plot_layout(ncol = 3) +
    patchwork::plot_annotation(
        theme = theme(plot.margin = margin())
    )


Lollipop


vCM

FEATURES_SELECTED <- c(
    "ENSG00000115641_FHL2",
    "ENSG00000092054_MYH7",
    "ENSG00000111245_MYL2",
    "ENSG00000169851_PCDH7",
    "ENSG00000169760_NLGN1",
    #
    "ENSG00000140945_CDH13",
    "ENSG00000186314_PRELID2",
    "ENSG00000282917_AC107068.2",
    "ENSG00000249797_LINC02147",
    #
    "ENSG00000162614_NEXN",
    "ENSG00000148677_ANKRD1",
    "ENSG00000022267_FHL1",
    "ENSG00000163092_XIRP2",
    "ENSG00000100345_MYH9",
    "ENSG00000198842_DUSP27",
    "ENSG00000168334_XIRP1",
    "ENSG00000130176_CNN1",
    #
    "ENSG00000109846_CRYAB",
    "ENSG00000101608_MYL12A",
    "ENSG00000075624_ACTB",
    "ENSG00000189043_NDUFA4",
    "ENSG00000127184_COX7C",
    "ENSG00000135940_COX5B",
    "ENSG00000147123_NDUFB11",
    #
    "ENSG00000164741_DLC1",
    "ENSG00000164330_EBF1",
    "ENSG00000134532_SOX5",
    "ENSG00000221818_EBF2",
    "ENSG00000169282_KCNAB1"
)

cells_states_selected <- c(
    "vCM1", "vCM2", "vCM3", "vCM4", "vCM5"
)
cells_selected_lollipop <- purrr::map(cells_states_selected, function(x) {
    embedding_predefined %>%
        filter(cell_states == x) %>%
        pull(cell)
})
names(cells_selected_lollipop) <- cells_states_selected
matrix_cpm_subset <- purrr::map(FEATURES_SELECTED, function(x) {
    v <- calc_cpm_anndata(
        anndata = adata,
        k = x,
        cells = embedding_predefined$cell
    )
    v[is.na(v)] <- 0

    return(v)
}) %>%
    purrr::reduce(rbind)
rownames(matrix_cpm_subset) <- FEATURES_SELECTED


Extended Data Fig. 3a

plot_lollipop(
    cells = cells_selected_lollipop,
    features = FEATURES_SELECTED,
    matrix_cpm = matrix_cpm_subset,
    color_range_limits = NULL
)

aCM

# grep(pattern = "gsn", x = rownames(adata$var), ignore.case = TRUE, value = TRUE)
FEATURES_SELECTED <- c(
    "ENSG00000197616_MYH6",
    "ENSG00000175206_NPPA",
    "ENSG00000198336_MYL4",
    "ENSG00000181072_CHRM2",
    "ENSG00000117114_ADGRL2",
    #
    "ENSG00000115641_FHL2",
    "ENSG00000184347_SLIT3",
    "ENSG00000105697_HAMP",
    "ENSG00000128918_ALDH1A2",
    "ENSG00000204928_GRXCR2",
    "ENSG00000162670_BRINP3",
    #
    "ENSG00000138347_MYPN",
    "ENSG00000211455_STK38L",
    "ENSG00000154553_PDLIM3",
    "ENSG00000198842_DUSP27",
    "ENSG00000100345_MYH9",
    "ENSG00000130176_CNN1",
    #
    "ENSG00000104879_CKM",
    "ENSG00000189043_NDUFA4",
    "ENSG00000131143_COX4I1",
    "ENSG00000121769_FABP3",
    #
    "ENSG00000164741_DLC1",
    "ENSG00000127472_PLA2G5",
    "ENSG00000184384_MAML2",
    "ENSG00000249669_CARMN",
    "ENSG00000148180_GSN"
)

cells_states_selected <- c(
    "aCM1", "aCM2", "aCM3", "aCM4", "aCM5"
)
cells_selected_lollipop <- purrr::map(cells_states_selected, function(x) {
    embedding_predefined %>%
        filter(cell_states == x) %>%
        pull(cell)
})
names(cells_selected_lollipop) <- cells_states_selected
matrix_cpm_subset <- purrr::map(FEATURES_SELECTED, function(x) {
    v <- calc_cpm_anndata(
        anndata = adata,
        k = x,
        cells = embedding_predefined$cell
    )
    v[is.na(v)] <- 0

    return(v)
}) %>%
    purrr::reduce(rbind)
rownames(matrix_cpm_subset) <- FEATURES_SELECTED


Extended Data Fig. 3b

plot_lollipop(
    cells = cells_selected_lollipop,
    features = FEATURES_SELECTED,
    matrix_cpm = matrix_cpm_subset,
    color_range_limits = NULL
)

Celluar composition

prepare_cluster_composition(
    embedding = embedding_predefined,
    x = region,
    group = cell_type
) %>%
    plot_barplot(
        x = region,
        y = percentage,
        z = cell_type,
        legend_ncol = 2
    ) +
    theme_bw(base_size = 6) +
    theme(
        legend.key.size = grid::unit(3, "mm")
    )



R session info

devtools::session_info()$platform
##  setting  value                       
##  version  R version 4.0.3 (2020-10-10)
##  os       macOS  11.2                 
##  system   x86_64, darwin20.2.0        
##  ui       unknown                     
##  language (EN)                        
##  collate  en_US.UTF-8                 
##  ctype    en_US.UTF-8                 
##  tz       America/Chicago             
##  date     2021-02-03
devtools::session_info()$pack %>%
    as_tibble() %>%
    dplyr::select(
        package,
        loadedversion,
        date,
        `source`
    ) %>%
    # print(n = nrow(.))
    gt::gt() %>%
    gt::tab_options(table.font.size = "median")
package loadedversion date source
assertthat 0.2.1 2019-03-21 CRAN (R 4.0.0)
backports 1.2.1 2020-12-09 CRAN (R 4.0.3)
beeswarm 0.2.3 2016-04-25 CRAN (R 4.0.0)
broom 0.7.4.9000 2021-02-02 Github (tidymodels/broom@b750e4f)
bslib 0.2.4.9000 2021-02-02 Github (rstudio/bslib@b3cd7a9)
cachem 1.0.1 2021-01-22 Github (r-lib/cachem@27c8d89)
callr 3.5.1.9000 2021-01-06 Github (r-lib/callr@743069f)
cellranger 1.1.0 2016-07-27 CRAN (R 4.0.0)
checkmate 2.0.0 2020-02-06 CRAN (R 4.0.0)
cli 2.3.0 2021-01-31 CRAN (R 4.0.3)
colorspace 2.0-0 2020-11-10 R-Forge (R 4.0.3)
crayon 1.4.0 2021-01-30 CRAN (R 4.0.3)
DBI 1.1.1 2021-01-15 CRAN (R 4.0.3)
dbplyr 2.0.0 2020-11-03 CRAN (R 4.0.3)
desc 1.2.0 2018-05-01 CRAN (R 4.0.0)
devtools 2.3.1.9000 2021-01-21 Github (r-lib/devtools@ef962e4)
digest 0.6.27 2020-10-24 CRAN (R 4.0.3)
dplyr 1.0.4.9000 2021-02-02 Github (tidyverse/dplyr@2455c77)
ellipsis 0.3.1 2020-05-15 CRAN (R 4.0.3)
evaluate 0.14 2019-05-28 CRAN (R 4.0.0)
extrafont 0.17 2014-12-08 CRAN (R 4.0.2)
extrafontdb 1.0 2012-06-11 CRAN (R 4.0.0)
fansi 0.4.2 2021-01-15 CRAN (R 4.0.3)
farver 2.0.3 2020-01-16 CRAN (R 4.0.0)
fastmap 1.1.0 2021-01-25 CRAN (R 4.0.3)
forcats 0.5.1.9000 2021-01-28 Github (tidyverse/forcats@b5fce89)
fs 1.5.0 2020-07-31 CRAN (R 4.0.3)
generics 0.1.0 2020-10-31 CRAN (R 4.0.3)
ggbeeswarm 0.6.0 2017-08-07 CRAN (R 4.0.3)
ggplot2 3.3.3.9000 2021-01-30 Github (tidyverse/ggplot2@dbd7d79)
ggrastr 0.2.1 2021-01-27 Github (VPetukhov/ggrastr@f1ce011)
glue 1.4.1.9000 2021-01-06 Github (tidyverse/glue@f0a7b2a)
gt 0.2.2 2020-11-25 Github (rstudio/gt@bae32f4)
gtable 0.3.0 2019-03-25 CRAN (R 4.0.0)
haven 2.3.1 2020-06-01 CRAN (R 4.0.0)
highr 0.8 2019-03-20 CRAN (R 4.0.0)
hms 1.0.0 2021-01-13 CRAN (R 4.0.3)
htmltools 0.5.1.9000 2021-01-23 Github (rstudio/htmltools@e7f0393)
httr 1.4.2 2020-07-20 CRAN (R 4.0.2)
jquerylib 0.1.3 2020-12-17 Github (rstudio/jquerylib@8f8e639)
jsonlite 1.7.2 2020-12-09 CRAN (R 4.0.3)
knitr 1.31.4 2021-01-29 Github (yihui/knitr@d83e8de)
labeling 0.4.2 2020-10-20 CRAN (R 4.0.3)
lattice 0.20-41 2020-04-02 CRAN (R 4.0.3)
lifecycle 0.2.0 2020-03-06 CRAN (R 4.0.0)
lubridate 1.7.9.2 2021-01-04 Github (tidyverse/lubridate@aab2e30)
magrittr 2.0.1.9000 2020-12-14 Github (tidyverse/magrittr@bb1c86a)
Matrix 1.3-2 2021-01-06 CRAN (R 4.0.3)
memoise 2.0.0 2021-01-26 CRAN (R 4.0.3)
modelr 0.1.8.9000 2021-01-23 Github (tidyverse/modelr@16168e0)
munsell 0.5.0 2018-06-12 CRAN (R 4.0.0)
patchwork 1.1.1 2020-12-17 CRAN (R 4.0.3)
pillar 1.4.99.9006 2021-02-02 Github (r-lib/pillar@c6f8311)
pkgbuild 1.2.0 2020-12-15 CRAN (R 4.0.3)
pkgconfig 2.0.3 2019-09-22 CRAN (R 4.0.0)
pkgload 1.1.0 2020-05-29 CRAN (R 4.0.0)
png 0.1-7 2013-12-03 CRAN (R 4.0.0)
prettyunits 1.1.1.9000 2020-11-23 Github (r-lib/prettyunits@b1cdad8)
processx 3.4.5 2020-11-30 CRAN (R 4.0.3)
ps 1.5.0 2020-12-05 CRAN (R 4.0.3)
purrr 0.3.4.9000 2020-11-23 Github (tidyverse/purrr@af06d45)
R6 2.5.0 2020-11-02 Github (r-lib/R6@6cf7d4e)
ragg 1.0.0.9000 2021-01-15 Github (r-lib/ragg@aebed7f)
Rcpp 1.0.6 2021-01-15 CRAN (R 4.0.3)
readr 1.4.0.9000 2021-01-23 Github (tidyverse/readr@483ca6c)
readxl 1.3.1.9000 2021-01-23 Github (tidyverse/readxl@9f85fa5)
remotes 2.2.0.9000 2021-01-25 Github (r-lib/remotes@cf2b4a9)
reprex 1.0.0 2021-01-27 CRAN (R 4.0.3)
reticulate 1.18 2020-10-25 CRAN (R 4.0.3)
rlang 0.4.10.9000 2021-02-02 Github (r-lib/rlang@d15299e)
rmarkdown 2.6.6 2021-02-02 Github (rstudio/rmarkdown@d8e7a15)
rprojroot 2.0.2 2020-11-15 CRAN (R 4.0.3)
rstudioapi 0.13.0-9000 2021-01-02 Github (rstudio/rstudioapi@4baeb39)
Rttf2pt1 1.3.8 2020-01-10 CRAN (R 4.0.0)
rvest 0.3.6 2020-07-25 CRAN (R 4.0.2)
sass 0.3.1 2021-01-24 CRAN (R 4.0.3)
scales 1.1.1 2020-05-11 CRAN (R 4.0.3)
sessioninfo 1.1.1 2018-11-05 CRAN (R 4.0.3)
stringi 1.5.3 2020-09-09 CRAN (R 4.0.2)
stringr 1.4.0.9000 2021-01-23 Github (tidyverse/stringr@1f03eb0)
styler 1.3.2.9000 2021-02-02 Github (r-lib/styler@3560b39)
systemfonts 1.0.0.9000 2021-02-02 Github (r-lib/systemfonts@d85abe2)
testthat 3.0.1 2021-01-29 Github (r-lib/testthat@b19b5ac)
textshaping 0.2.1.9000 2021-01-15 Github (r-lib/textshaping@f6f2697)
tibble 3.0.6.9000 2021-01-31 Github (tidyverse/tibble@eb99cb6)
tidyr 1.1.2.9000 2021-01-23 Github (tidyverse/tidyr@c338aa9)
tidyselect 1.1.0 2020-05-11 CRAN (R 4.0.3)
tidyverse 1.3.0.9000 2020-11-23 Github (hadley/tidyverse@8a0bb99)
usethis 2.0.0.9000 2021-01-31 Github (r-lib/usethis@716b703)
utf8 1.1.4 2018-05-24 CRAN (R 4.0.0)
vctrs 0.3.6 2020-12-17 CRAN (R 4.0.3)
vipor 0.4.5 2017-03-22 CRAN (R 4.0.0)
viridisLite 0.3.0 2018-02-01 CRAN (R 4.0.0)
withr 2.4.1 2021-01-26 CRAN (R 4.0.3)
xfun 0.20 2021-01-06 CRAN (R 4.0.3)
xml2 1.3.2 2020-04-23 CRAN (R 4.0.0)
yaml 2.2.1 2020-02-01 CRAN (R 4.0.0)